## "China's Ideological Spectrum"
## by Jennifer Pan and Yiqing Xu

### Install Packages
## install.packages("lavaan")
## install.packages("doParallel","foreach")

########################################################
## CFA analysis: Searching for a better model 
########################################################

### WARNING - this part takes extremely long processing time
### Results can be directly loaded from intermediate file (line 115)

## rm(list=ls(all=TRUE)) 
## library(lavaan)

## ## load data
## load("data/sample10K.RData")
## names(d)
## table(d$year)

## ## Grouping survey questions
## questions <- paste("q", 1:50, sep = "")
## f1 <- c(1, 2, 3, 4, 5, 10, 12, 13, 14, 17) # political liberalism
## f2 <- c(21, 25, 27, 29, 30, 37, 40) # free market
## f3 <- c(32, 41, 42, 43, 46, 47, 48, 49, 50) # traditional values
## f4 <- c(9, 11, 15, 16, 18, 19, 20) # nationalism
## f5 <- c(24, 6, 7, 8, 44, 45) # individual freedom
## f6 <- c(23, 26, 28, 35, 39) # national interest
## f7 <- c(22, 31, 33, 34, 36, 38) # social justice
## FSet <- list(f1, f2, f3, f4, f5, f6, f7)


## ## a function that generates lavaan model syntax from a list of nubmers
## genModel <- function(model.list) {
##     model <- ""
##     for (j in 1:length(model.list)) {
##         model <- paste(model,
##                        paste('factor',j,' =~ ',
##                              paste(paste('q', model.list[[j]], sep=""),
##                                    collapse = " + "), sep = ""),
##                        "\n", sep = "") 
##     }
##     return(model)
## }


## ## first, search for the best combination among the three base categories
## PSet <- matrix(NA, 0, 7)
## colnames(PSet) <- paste("assign",1:7, sep = "")

## ## 877 possibilities
## c1 <- 1
## for (c2 in 1:2) {
##     for (c3 in 1:(max(c1,c2)+1)) {
##         for (c4 in 1:(max(c1,c2,c3)+1)) {
##             for (c5 in 1:(max(c1,c2,c3,c4)+1)) {
##                 for (c6 in 1:(max(c1,c2,c3,c4,c5)+1)) {
##                     for (c7 in 1:(max(c1,c2,c3,c4,c5,c6)+1)) {
##                         PSet <- rbind(PSet, c(c1, c2, c3, c4, c5, c6, c7))
##                     }
##                 }
##             }
##         } 
##     }
## }
## results <- matrix(NA, dim(PSet)[1], 6)
## colnames(results) <- c("converged","post.check","chisq", "cfi", "tli", "rmsea")

## ## parallel computing: THIS'S GONNA TAKE A WHILE
## ## jump to line 113 to see the results

## library(doParallel)
## library(foreach)
## cl <- makeCluster(10)
## registerDoParallel(cl)
## system.time(
##     results <- foreach (i=1:dim(PSet)[1], .combine = rbind, .packages = c("lavaan"),
##                         .inorder = FALSE) %dopar% {
##                             model.list <- list()
##                             for (j in 1:7) {
##                                 if (j %in% PSet[i, ]) {
##                                     model.list <- c(model.list, list(unique(unlist(FSet[which(PSet[i, ] == j)]))))
##                                 }
##                             }
##                             suppressWarnings(fit <- cfa(genModel(model.list),
##                                                         ordered = questions,
##                                                         data=d[,questions],
##                                                         std.lv = TRUE))
##                             converged <- inspect(fit, c("converged"))
##                             if (converged == TRUE) {
##                                 post.cov <- suppressWarnings(inspect(fit, c("post.check")))
##                                 out <- c("converged" = converged, "post.cov" = post.cov,
##                                          fitMeasures(fit, c("chisq","cfi", "tli","rmsea"))) 
##                             } else {
##                                 out <- c(0, rep(NA, 5))
##                             }
##                             return(out)
##                         })
## stopCluster(cl)
## colnames(results) <- c("converged", "post.check","chisq", "cfi", "tli", "rmsea")

## ndim <- apply(PSet[,1:7],1,max)
## results.final <- cbind.data.frame(PSet, results, ndim)
## results.final$id <- 1:dim(results.final)[1]
## rownames(results.final) <- NULL
## head(results.final)

## save(results.final, file = "output/result_cfa_search.RData")


########### Show Results ###################

rm(list=ls(all=TRUE)) 
load("output/result_cfa_search.RData")
post.cov <- which(results.final$post.check == 1)
good.results <- results.final[post.cov,]
(best.id<-which.min(good.results$chisq))
which.max(good.results$cfi)
which.max(good.results$tli)
which.min(good.results$rmsea)
print(good.results[best.id, ])
## 1 2 2 3 1 2 2 

## Assignment: 1, 2, 2, 3, 1, 2, 2
## ------------------------------------
## f1 <- c(1, 2, 3, 4, 5, 10, 12, 13, 14, 17) # political liberalism
## f5 <- c(24, 6, 7, 8, 41, 44, 45, 50) # individual freedom
## ------------------------------------
## f2 <- c(21, 25, 27, 29, 30, 37, 40) # free market
## f3 <- c(32, 42, 43, 46, 47, 48, 49) # traditional values
## f6 <- c(23, 26, 28, 35, 39) # national interest
## f7 <- c(22, 31, 33, 34, 36, 38) # social justice
## ------------------------------------
## f4 <- c(9, 11, 15, 16, 18, 19, 20) # nationalism

## best models
post.cov <- which(results.final$post.check == 1)
length(post.cov) ## 92 have postive definite covariance matrix
length(post.cov)/dim(results.final)[1] ## 10% of the models
good.results <- results.final[post.cov,]

head(good.results)
table(good.results$ndim)
## 1  2  3 
## 1 39 52 


##############
## Figure 5
##############

pdf("graphs/cfa_chisq.pdf")
par(mar = c(4, 4, 1, 1))
plot(x = jitter(results.final$ndim[-post.cov]),
     y = results.final$chisq[-post.cov],
     xlim = c(0.5,7.5), ylim = c(64500,68100),
     col = "gray80", xlab = "", ylab = "", cex.axis = 1.2) 
points(jitter(results.final$ndim[setdiff(post.cov,best.results$id)]),
       results.final$chisq[setdiff(post.cov,best.results$id)],
       pch = 1, col = "gray30")
points(best.results$ndim, best.results$chisq,
       pch = 16, cex = 1.5, col = 1)
text(1, 67800, "Model C", cex = 1.2)
text(2, 65900, "Model B", cex = 1.2)
text(3, 65580, "Model A", cex = 1.2)
legend("topright", c("Valid","Invalid (neg. def. cov.)"),
       col = c("gray10","gray70"), cex = 1.3,
       pch = 1, bty = "n")
mtext("# Latent Factors", 1, 2.5, cex = 1.6)
mtext("Chi-squared", 2, 2.5, cex = 1.6)
graphics.off()

pdf("graphs/cfa_rmesa.pdf")
par(mar = c(4, 4, 1, 1))
plot(x = jitter(results.final$ndim[-post.cov]),
     y = results.final$rmsea[-post.cov],
     xlim = c(0.5,7.5), ylim = c(0.07385, 0.07560),
     col = "gray80", xlab = "", ylab = "", cex.axis = 1.2) 
points(jitter(results.final$ndim[setdiff(post.cov,best.results$id)]),
       results.final$rmsea[setdiff(post.cov,best.results$id)],
       pch = 1, col = "gray30")
points(best.results$ndim, best.results$rmsea,
       pch = 16, cex = 1.5, col = 1)
text(1, 0.0753, "Model C", cex = 1.2)
text(2, 0.07425, "Model B", cex = 1.2)
text(3, 0.07415, "Model A", cex = 1.2)
legend("topright", c("Valid","Invalid (neg. def. cov.)"),
       col = c("gray10","gray70"), cex = 1.3,
       pch = 1, bty = "n")
mtext("# Latent Factors", 1, 2.5, cex = 1.6)
mtext("RMSEA", 2, 2.5, cex = 1.6)
graphics.off()


##############
## Table 1
##############

## The best model: 3 dimensional
library(plyr)
best.results <- ddply(good.results, .(ndim), function(x){
    data.frame(x[which.min(x$chisq),])
})
(best.id<-which.min(good.results$chisq))
which.max(good.results$cfi)
which.max(good.results$tli)
which.min(good.results$rmsea)
print(good.results[best.id, ])
print(best.results[c(3,2,1),])
## p-values
1 - pchisq(301, 2)
1 - pchisq(2180, 3)

